'''
Author: JiraiyaChen 49619401+JiraiyaChen@users.noreply.github.com
Date: 2023-11-21 16:38:04
LastEditors: JiraiyaChen 49619401+JiraiyaChen@users.noreply.github.com
LastEditTime: 2023-12-06 15:46:29
Description: 
'''
import scrapy
from scrapy import Selector, Request
from spider2107.items import TestItem
import json

class DoubanSpider(scrapy.Spider):
    name = "test"
    allowed_domains = ["xc8866.cc"]
    start_urls = ["https://xc8866.cc"]

    def start_requests(self):
        with open('nanjin.json', 'r') as file:
            # 从文件中加载JSON数据
            data = json.load(file)
        for entry in data:
            for url in entry["url"]:
                yield Request(url=f'https://xc8866.cc/{url}')

    def parse(self, response):
        # 标题
        title = response.xpath("//h4/text()").get()
        # 图片
        imgs = response.css("img.img-fluid").xpath("@src").getall()
        tds = response.xpath("//blockquote//table//td")  # 可以找到

        test_item = TestItem()
        test_item['title'] = title
        test_item['imgs'] = imgs

        test_item['loc'] = tds[0].css("span::text").extract_first()
        test_item['year'] = tds[1].css("span::text").extract_first()
        test_item['point'] = tds[2].css("span::text").extract_first()
        test_item['money'] = tds[3].css("span::text").extract_first()
        test_item['server'] = tds[4].css("span::text").extract_first()
        test_item['wx'] = tds[5].css("span::text").extract_first()
        test_item['qq'] = tds[6].css("span::text").extract_first()
        test_item['phone'] = tds[7].css("span::text").extract_first()
        yield test_item
